Data Read in & Wrangling

library(tswge)
library(dplyr)
library(tidyverse)
library(readxl)
library(vars)
library(nnfor)
# Set this to data folder for your file
#setwd("/Users/riccoferraro/Documents/SMU/TimeSeries/TimeSeriesFinalProject/data")
setwd("/Users/mingyang/Desktop/SMU/TimeSeries/TimeSeriesFinalProject/data")
# Read in DFWA electricity Data
dfwa.electricity = read.csv("AVG_ELEC_DFWA_TX.csv",col.names = c("DATE", "AVG_EP"), 
                          colClasses = c(DATE="character", AVG_EP="character"))

# Read in CPI data for Southern Urban area
cpi = read_excel("SouthernUrbanCPI.xlsx",sheet = "BLS Data Series", skip = 11)
# Getting rid of Half1 and Half2 which starts with S
cpi = cpi %>% filter(!grepl('S',Period) )
# Getting rid of Annual which labeled as M13
cpi = cpi %>% filter(!grepl('M13',Period) )

# Read in Gas Price data from same area
gas.price = read.csv("Dallas_FWA_GAS.csv")

# Read in Temperature Data & cleaning
temp = read_excel("DallasAreaTemp.xlsx", sheet = "Sheet1")
temp = temp %>% tidyr::pivot_longer(
  cols = starts_with("Mon_"),
  names_to = "Month",
  values_to = "Temperature"
)
temp = temp[1:386,]

# subset dataset
# which(dfwa.electricity$DATE=="1990-01-01") # 135
dfwa.electricity = dfwa.electricity[135:520,]
rownames(dfwa.electricity) <- 1:nrow(dfwa.electricity)

# which(cpi$Year==1990 & cpi$Period=="M01")
cpi = cpi[91:476,]
rownames(cpi) <- 1:nrow(cpi)

# which(gas.price$DATE=="1990-01-01") # 145
gas.price = gas.price[145:530,]
rownames(gas.price) <- 1:nrow(gas.price)


#### Creating ultimate data frame under variable 'df' ####
df = dfwa.electricity
df$CPI = cpi$Value
df$GAS_P = gas.price$APUS37A7471A
df$AVG_EP = as.numeric(df$AVG_EP)
df$TEMP = temp$Temperature

#### Due to distribution market deregulation in 1995, team decided to cut the realization
#### prior to 2000
# which(df$DATE=="2000-01-01") # 121
df = df[121:386,]
rownames(df) <- 1:nrow(df)

plotts.wge(df$AVG_EP)

plotts.wge(df$CPI)

plotts.wge(df$GAS_P)

plotts.wge(df$TEMP)

EDA and Univariate Modeling

# There seem to be a slowly damping behavior which might support difference the data
plotts.sample.wge(df$AVG_EP)

## $xbar
## [1] 0.1177444
## 
## $autplt
##  [1] 1.0000000 0.9602995 0.9151642 0.8723585 0.8295085 0.7865279 0.7574500
##  [8] 0.7362720 0.7262652 0.7196580 0.7114933 0.7017810 0.6879721 0.6568276
## [15] 0.6238992 0.5937430 0.5625050 0.5322346 0.5184125 0.5104122 0.5059540
## [22] 0.5026686 0.4994694 0.4952331 0.4869328 0.4606747
## 
## $freq
##   [1] 0.003759398 0.007518797 0.011278195 0.015037594 0.018796992 0.022556391
##   [7] 0.026315789 0.030075188 0.033834586 0.037593985 0.041353383 0.045112782
##  [13] 0.048872180 0.052631579 0.056390977 0.060150376 0.063909774 0.067669173
##  [19] 0.071428571 0.075187970 0.078947368 0.082706767 0.086466165 0.090225564
##  [25] 0.093984962 0.097744361 0.101503759 0.105263158 0.109022556 0.112781955
##  [31] 0.116541353 0.120300752 0.124060150 0.127819549 0.131578947 0.135338346
##  [37] 0.139097744 0.142857143 0.146616541 0.150375940 0.154135338 0.157894737
##  [43] 0.161654135 0.165413534 0.169172932 0.172932331 0.176691729 0.180451128
##  [49] 0.184210526 0.187969925 0.191729323 0.195488722 0.199248120 0.203007519
##  [55] 0.206766917 0.210526316 0.214285714 0.218045113 0.221804511 0.225563910
##  [61] 0.229323308 0.233082707 0.236842105 0.240601504 0.244360902 0.248120301
##  [67] 0.251879699 0.255639098 0.259398496 0.263157895 0.266917293 0.270676692
##  [73] 0.274436090 0.278195489 0.281954887 0.285714286 0.289473684 0.293233083
##  [79] 0.296992481 0.300751880 0.304511278 0.308270677 0.312030075 0.315789474
##  [85] 0.319548872 0.323308271 0.327067669 0.330827068 0.334586466 0.338345865
##  [91] 0.342105263 0.345864662 0.349624060 0.353383459 0.357142857 0.360902256
##  [97] 0.364661654 0.368421053 0.372180451 0.375939850 0.379699248 0.383458647
## [103] 0.387218045 0.390977444 0.394736842 0.398496241 0.402255639 0.406015038
## [109] 0.409774436 0.413533835 0.417293233 0.421052632 0.424812030 0.428571429
## [115] 0.432330827 0.436090226 0.439849624 0.443609023 0.447368421 0.451127820
## [121] 0.454887218 0.458646617 0.462406015 0.466165414 0.469924812 0.473684211
## [127] 0.477443609 0.481203008 0.484962406 0.488721805 0.492481203 0.496240602
## [133] 0.500000000
## 
## $dbz
##   [1]  12.5686893  12.3173433  11.8982305  11.3114379  10.5579811   9.6409858
##   [7]   8.5676671   7.3524912   6.0218430   4.6198784   3.2132709   1.8890411
##  [13]   0.7381312  -0.1737122  -0.8283465  -1.2512906  -1.4891883  -1.5921087
##  [19]  -1.6091720  -1.5896992  -1.5821599  -1.6303232  -1.7694859  -2.0245855
##  [25]  -2.4100298  -2.9302313  -3.5799832  -4.3442938  -5.1978261  -6.1046537
##  [31]  -7.0196173  -7.8927223  -8.6769837  -9.3376650  -9.8587949 -10.2438964
##  [37] -10.5116138 -10.6894307 -10.8079797 -10.8966372 -10.9802874 -11.0772077
##  [43] -11.1981223 -11.3463448 -11.5187772 -11.7074923 -11.9016724 -12.0897493
##  [49] -12.2616000 -12.4105708 -12.5349379 -12.6383063 -12.7285740 -12.8154841
##  [55] -12.9072621 -13.0071528 -13.1107990 -13.2054874 -13.2722777 -13.2914789
##  [61] -13.2502907 -13.1493354 -13.0044255 -12.8426050 -12.6950565 -12.5905515
##  [67] -12.5514804 -12.5924245 -12.7202475 -12.9346975 -13.2288848 -13.5893678
##  [73] -13.9958765 -14.4210309 -14.8308363 -15.1871536 -15.4531999 -15.6016961
##  [79] -15.6227241 -15.5269821 -15.3423318 -15.1058464 -14.8557019 -14.6258190
##  [85] -14.4436045 -14.3297151 -14.2986430 -14.3593301 -14.5154180 -14.7649981
##  [91] -15.0998965 -15.5047232 -15.9562371 -16.4240086 -16.8735912 -17.2727527
##  [97] -17.5992900 -17.8466594 -18.0238263 -18.1492694 -18.2427639 -18.3189949
## [103] -18.3848575 -18.4401307 -18.4802628 -18.4998799 -18.4957898 -18.4686207
## [109] -18.4228343 -18.3655051 -18.3046120 -18.2475191 -18.1999932 -18.1657677
## [115] -18.1464773 -18.1417432 -18.1492234 -18.1645248 -18.1809693 -18.1893543
## [121] -18.1780207 -18.1336861 -18.0434445 -17.8979003 -17.6946141 -17.4404011
## [127] -17.1512365 -16.8496724 -16.5609037 -16.3089797 -16.1141036 -15.9911686
## [133] -15.9491882
# Try overfitting
est = est.ar.wge(df$AVG_EP, p=16, type='burg')
##   
##   
## Coefficients of AR polynomial:  
## 1.1098 -0.0493 -0.1180 0.0863 -0.1701 0.1127 -0.0761 0.0744 0.0928 -0.2121 0.1797 0.3475 -0.4070 -0.0942 0.1370 -0.0341 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.9605B              1.0411               0.9605       0.0000
## 1-0.0146B+0.9208B^2    0.0079+-1.0421i      0.9596       0.2488
## 1+0.9509B+0.9131B^2   -0.5207+-0.9078i      0.9555       0.3329
## 1-1.6443B+0.9007B^2    0.9128+-0.5264i      0.9490       0.0833
## 1-0.9206B+0.8507B^2    0.5411+-0.9395i      0.9224       0.1668
## 1-0.9208B              1.0861               0.9208       0.0000
## 1+1.4952B+0.7387B^2   -1.0120+-0.5740i      0.8595       0.4179
## 1+0.8128B             -1.2303               0.8128       0.5000
## 1+0.7275B             -1.3745               0.7275       0.5000
## 1-0.6354B+0.1371B^2    2.3176+-1.3870i      0.3702       0.0858
##   
## 
# compare this to seasonality of 12 
factor.wge(phi = c(rep(0,11),1))
##   
##   
## Coefficients of AR polynomial:  
## 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0000 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.0000B+1.0000B^2    0.5000+-0.8660i      1.0000       0.1667
## 1-1.0000B              1.0000               1.0000       0.0000
## 1-1.7321B+1.0000B^2    0.8660+-0.5000i      1.0000       0.0833
## 1+1.0000B+1.0000B^2   -0.5000+-0.8660i      1.0000       0.3333
## 1-0.0000B+1.0000B^2    0.0000+-1.0000i      1.0000       0.2500
## 1+1.7321B+1.0000B^2   -0.8660+-0.5000i      1.0000       0.4167
## 1+1.0000B             -1.0000               1.0000       0.5000
##   
## 
# By using overfitting method, 1-B term seems to have the largest absolute reciprocal root which by itself supports differencing the data
# Additionally, there seem to have a 1-B^12 seasonality, even some of the factors such as 1+B at system frequency of -.5 and 1+1.73B+1B^2 at System frequency of 0.4167 are not as close to the unit circle. Although it might worth exploring s = 12 also

d1 = artrans.wge(df$AVG_EP, phi.tr = 1)

# with the differenced data there seem to have some sort of seasonal behavior at 12 left
d1.12 = artrans.wge(d1, phi.tr = c(rep(0,11),0.4))

dev.off()
## null device 
##           1
acf(d1.12) # AIC looks about white noise
ljung.wge(d1.12, K=24) # K=24 reject white noise hypothesis
## Obs 0.1177406 0.09510556 -0.02821038 0.03577436 -0.05663082 -0.05190919 0.007935022 -0.02846601 0.06039309 -0.1174017 0.04320217 -0.1317076 0.06581953 -0.06049459 0.02239541 -0.06491078 -0.04911588 -0.1121 -0.1665538 -0.03136583 -0.05822815 0.03840348 0.03680909 0.1222457
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 39.35395
## 
## $df
## [1] 24
## 
## $pval
## [1] 0.02506178
ljung.wge(d1.12, K=48) # K = 48 reject white noise 
## Obs 0.1177406 0.09510556 -0.02821038 0.03577436 -0.05663082 -0.05190919 0.007935022 -0.02846601 0.06039309 -0.1174017 0.04320217 -0.1317076 0.06581953 -0.06049459 0.02239541 -0.06491078 -0.04911588 -0.1121 -0.1665538 -0.03136583 -0.05822815 0.03840348 0.03680909 0.1222457 -0.009325486 -0.03049484 -0.01441944 0.03846121 -0.1092629 0.0477006 0.01390417 0.01907413 0.00746239 -0.02442159 0.01661073 0.1696413 0.03805096 0.0511036 0.03743145 -0.07413577 -0.1043787 -0.1778826 -0.08752393 -0.1099639 9.902894e-05 -0.04875424 0.09696807 0.1329072
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 48
## 
## $chi.square
## [1] 84.83926
## 
## $df
## [1] 48
## 
## $pval
## [1] 0.0008252289
# Thus models require further modeling

# est1.12 = aic5.wge(d1.12, p=0:15, q=0:5) # AIC picked p = 7, q=4
# est.1.12.bic = aic5.wge(d1.12, p=0:15, q=0:5, type = 'bic') # bic picked p=0, q=0
# est.1.12.aicc = aic5.wge(d1.12, p=0:15, q=0:5, type = 'aicc') # aicc picked p=1, q=0
# seems AIC leaning towards a fancy model, while bic leaning towards white noise.
# I will attempt something in the middle by using AR(1) instead
params.est = est.arma.wge(d1.12, p=1)
##   
##   
## Coefficients of AR polynomial:  
## 0.1182 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.1182B              8.4627               0.1182       0.0000
##   
## 
acf(params.est$res) # residuals looks about white noise
ljung.wge(params.est$res, K=24) # K=24 reject white noise hypothesis
## Obs -0.01014382 0.08726767 -0.04465155 0.04663999 -0.05631173 -0.04714291 0.01811757 -0.03731123 0.0796301 -0.1331266 0.07436933 -0.1485251 0.09073813 -0.07272263 0.03809915 -0.06359699 -0.02927703 -0.08955453 -0.154185 -0.005329216 -0.06060576 0.04209565 0.01856966 0.1223151
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 38.64312
## 
## $df
## [1] 24
## 
## $pval
## [1] 0.02975776
ljung.wge(params.est$res, K=48) # K = 48reject null hypothesis of white noise
## Obs -0.01014382 0.08726767 -0.04465155 0.04663999 -0.05631173 -0.04714291 0.01811757 -0.03731123 0.0796301 -0.1331266 0.07436933 -0.1485251 0.09073813 -0.07272263 0.03809915 -0.06359699 -0.02927703 -0.08955453 -0.154185 -0.005329216 -0.06060576 0.04209565 0.01856966 0.1223151 -0.02076998 -0.02871357 -0.01578537 0.05426695 -0.1226772 0.06055979 0.006250035 0.01706408 0.008359714 -0.02802056 -0.0004448668 0.1679758 0.01269146 0.04367132 0.04119715 -0.06838042 -0.0771094 -0.1602163 -0.05547658 -0.102604 0.01950958 -0.06197392 0.08988801 0.1271899
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 48
## 
## $chi.square
## [1] 79.38565
## 
## $df
## [1] 48
## 
## $pval
## [1] 0.002934686
# Try fancier model identified by AIC
params.est = est.arma.wge(d1.12, p=7,q=4)
##   
##   
## Coefficients of AR polynomial:  
## -0.8600 -0.1354 -0.9232 -0.6079 0.1714 -0.0515 -0.0085 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1+1.8898B+0.9402B^2   -1.0050+-0.2314i      0.9696       0.4640
## 1-0.7621B+0.8873B^2    0.4294+-0.9709i      0.9420       0.1837
## 1-0.3782B+0.0918B^2    2.0591+-2.5786i      0.3030       0.1428
## 1+0.1104B             -9.0565               0.1104       0.5000
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## -1.0151 -0.3327 -1.0074 -0.7941 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1-0.7997B+0.9310B^2    0.4295+-0.9432i      0.9649       0.1820
## 1+1.8148B+0.8530B^2   -1.0638+-0.2018i      0.9236       0.4702
##   
## 
acf(params.est$res) # residuals looks about white noise
ljung.wge(params.est$res, K=24) # K=24 fail to reject white noise hypothesis
## Obs 0.0008823033 0.007168264 0.01503579 -0.002268612 -0.04788816 0.004909812 -0.009460663 0.01731718 -0.01596821 -0.05734878 0.0002766386 -0.04294852 0.006864266 -0.00473022 -0.04453687 -0.005459694 -0.0516052 -0.04690075 -0.1718459 -0.00778543 -0.05177915 0.0191305 0.05259443 0.08994917
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 16.1709
## 
## $df
## [1] 24
## 
## $pval
## [1] 0.8818087
ljung.wge(params.est$res, K=48) # K = 48 fail to reject null hypothesis of white noise
## Obs 0.0008823033 0.007168264 0.01503579 -0.002268612 -0.04788816 0.004909812 -0.009460663 0.01731718 -0.01596821 -0.05734878 0.0002766386 -0.04294852 0.006864266 -0.00473022 -0.04453687 -0.005459694 -0.0516052 -0.04690075 -0.1718459 -0.00778543 -0.05177915 0.0191305 0.05259443 0.08994917 0.01760575 -0.08784062 0.02203463 0.0346197 -0.103104 0.06085884 0.01483718 -0.008431453 0.01655047 -0.02007022 -0.02552383 0.2078729 -0.04307097 0.07704926 0.004884021 -0.02369241 -0.1080703 -0.107706 -0.09422072 -0.07229019 0.003069584 -0.0453473 0.07907943 0.134459
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 48
## 
## $chi.square
## [1] 58.63666
## 
## $df
## [1] 48
## 
## $pval
## [1] 0.1397722
# All models are wrong some are useful - we will proceed with fancier model for now
model1.param = mult.wge(fac1 = params.est$phi, fac2 = c(rep(0,11),0.4))
pred.short = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
                            d = 1, n.ahead = 3, limits = T, lastn = T)
ASE.short = mean((df$AVG_EP[264:266]-pred.short$f)^2)
ASE.short # 3.184093e-05 -> 0.0000318  #New 5.988891e-05 -> 0.0000599
## [1] 5.988891e-05
pred.long = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
                            d = 1, n.ahead = 36, limits = T, lastn = T)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-pred.long$f)^2)
ASE.long # 0.0001725023
## [1] 0.0001725023
# rolling.res.short = roll.win.rmse.wge(df$AVG_EP,phi = model1.param$model.coef, theta = params.est$theta, d = 1, horizon = 3 )
# rolling.res.short # RMSE = 0.004, ASE = 1.6e-0.5 -> 0.000016, nums of windows = 239

# rolling.res.long = roll.win.rmse.wge(df$AVG_EP,phi = model1.param$model.coef, theta = params.est$theta, d = 1, horizon = 36)
# rolling.res.long # RMSE = 0.014, ASE= 0.000196, num of windows = 206

Univariate signal plus noise model

x = df$AVG_EP
n = length(x)
t = 1:n
d = lm(x~t)
# x.z are the residuals from the regression line
x.z = x-d$coefficients[1] - d$coefficients[2]*t
plotts.sample.wge(x.z)

## $xbar
## [1] -1.589301e-16
## 
## $autplt
##  [1] 1.0000000 0.9489817 0.8860380 0.8248767 0.7634339 0.7020367 0.6633172
##  [8] 0.6378392 0.6311052 0.6288295 0.6223238 0.6178089 0.6051494 0.5567153
## [15] 0.5020588 0.4530965 0.4019963 0.3547485 0.3324767 0.3224324 0.3258818
## [22] 0.3325654 0.3401354 0.3471977 0.3461759 0.3106075
## 
## $freq
##   [1] 0.003759398 0.007518797 0.011278195 0.015037594 0.018796992 0.022556391
##   [7] 0.026315789 0.030075188 0.033834586 0.037593985 0.041353383 0.045112782
##  [13] 0.048872180 0.052631579 0.056390977 0.060150376 0.063909774 0.067669173
##  [19] 0.071428571 0.075187970 0.078947368 0.082706767 0.086466165 0.090225564
##  [25] 0.093984962 0.097744361 0.101503759 0.105263158 0.109022556 0.112781955
##  [31] 0.116541353 0.120300752 0.124060150 0.127819549 0.131578947 0.135338346
##  [37] 0.139097744 0.142857143 0.146616541 0.150375940 0.154135338 0.157894737
##  [43] 0.161654135 0.165413534 0.169172932 0.172932331 0.176691729 0.180451128
##  [49] 0.184210526 0.187969925 0.191729323 0.195488722 0.199248120 0.203007519
##  [55] 0.206766917 0.210526316 0.214285714 0.218045113 0.221804511 0.225563910
##  [61] 0.229323308 0.233082707 0.236842105 0.240601504 0.244360902 0.248120301
##  [67] 0.251879699 0.255639098 0.259398496 0.263157895 0.266917293 0.270676692
##  [73] 0.274436090 0.278195489 0.281954887 0.285714286 0.289473684 0.293233083
##  [79] 0.296992481 0.300751880 0.304511278 0.308270677 0.312030075 0.315789474
##  [85] 0.319548872 0.323308271 0.327067669 0.330827068 0.334586466 0.338345865
##  [91] 0.342105263 0.345864662 0.349624060 0.353383459 0.357142857 0.360902256
##  [97] 0.364661654 0.368421053 0.372180451 0.375939850 0.379699248 0.383458647
## [103] 0.387218045 0.390977444 0.394736842 0.398496241 0.402255639 0.406015038
## [109] 0.409774436 0.413533835 0.417293233 0.421052632 0.424812030 0.428571429
## [115] 0.432330827 0.436090226 0.439849624 0.443609023 0.447368421 0.451127820
## [121] 0.454887218 0.458646617 0.462406015 0.466165414 0.469924812 0.473684211
## [127] 0.477443609 0.481203008 0.484962406 0.488721805 0.492481203 0.496240602
## [133] 0.500000000
## 
## $dbz
##   [1]  12.0998003  11.8768937  11.5060831  10.9887309  10.3274249   9.5267949
##   [7]   8.5947728   7.5444633   6.3967899   5.1839353   3.9530584   2.7684781
##  [13]   1.7084627   0.8518069   0.2541335  -0.0745938  -0.1743568  -0.1177711
##  [19]   0.0144354   0.1506087   0.2347710   0.2264534   0.0979525  -0.1686171
##  [25]  -0.5835238  -1.1505684  -1.8670628  -2.7225211  -3.6959803  -4.7522406
##  [31]  -5.8384878  -6.8849898  -7.8152450  -8.5671197  -9.1146917  -9.4734693
##  [37]  -9.6849974  -9.7957781  -9.8450866  -9.8632716  -9.8747239  -9.9004990
##  [43]  -9.9588765 -10.0643331 -10.2259892 -10.4462858 -10.7202438 -11.0355161
##  [49] -11.3735133 -11.7119071 -12.0284324 -12.3050393 -12.5306170 -12.7007014
##  [55] -12.8141695 -12.8688826 -12.8591215 -12.7768723 -12.6169175 -12.3831459
##  [61] -12.0922015 -11.7721162 -11.4569520 -11.1806198 -10.9724449 -10.8551599
##  [67] -10.8446711 -10.9506198 -11.1769803 -11.5222336 -11.9788437 -12.5318600
##  [73] -13.1565860 -13.8156409 -14.4567368 -15.0142647 -15.4190440 -15.6175395
##  [79] -15.5923708 -15.3692278 -15.0045085 -14.5644123 -14.1091339 -13.6864056
##  [85] -13.3313740 -13.0688944 -12.9160214 -12.8839002 -12.9789154 -13.2031287
##  [91] -13.5540331 -14.0236183 -14.5967762 -15.2492900 -15.9461957 -16.6421910
##  [97] -17.2864000 -17.8325758 -18.2515646 -18.5385622 -18.7097299 -18.7909681
## [103] -18.8068600 -18.7752305 -18.7074038 -18.6113240 -18.4946431 -18.3661466
## [109] -18.2353212 -18.1108378 -17.9989737 -17.9027150 -17.8218228 -17.7537556
## [115] -17.6950803 -17.6428399 -17.5953139 -17.5517708 -17.5111609 -17.4701294
## [121] -17.4211213 -17.3516003 -17.2454122 -17.0867976 -16.8662352 -16.5856744
## [127] -16.2602995 -15.9157677 -15.5825591 -15.2902399 -15.0635146 -14.9203412
## [133] -14.8714355
ar.z = aic.wge(x.z, p=0:15)
x.trans = artrans.wge(x, phi.tr = ar.z$phi)

t.trans = artrans.wge(t, phi.tr = ar.z$phi)

fit = lm(x.trans~ t.trans)
summary(fit)
## 
## Call:
## lm(formula = x.trans ~ t.trans)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.014364 -0.002770 -0.000122  0.002314  0.016179 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 8.724e-03  6.588e-04  13.243  < 2e-16 ***
## t.trans     1.957e-04  4.296e-05   4.554 8.22e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004589 on 250 degrees of freedom
## Multiple R-squared:  0.07661,    Adjusted R-squared:  0.07291 
## F-statistic: 20.74 on 1 and 250 DF,  p-value: 8.22e-06
# As can be seen, after account for the serial correlation (AR(14)), there is strong evidence to suggest that the slope is significantly different from zero (pvalue < 0.0001). So we will take the trend out and proceed

# try bootstrapping method just to be sure
wbg.boot.wge(x, sn=103)
## $p
## [1] 2
## 
## $phi
## [1]  1.1384970 -0.1616007
## 
## $pv
## [1] 0.1077694
# Bootstrap-based test for trend failed to reject with p-value = 0.1078, however we will proceed to try out how using trend helps
ar.est = est.ar.wge(x = x.z, p=14, type = 'burg')
##   
##   
## Coefficients of AR polynomial:  
## 1.0829 -0.0764 -0.0694 0.0897 -0.1963 0.1276 -0.0738 0.0629 0.1052 -0.2335 0.1907 0.3402 -0.3996 -0.0021 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.6619B+0.9161B^2    0.9071+-0.5185i      0.9571       0.0826
## 1-1.9020B+0.9086B^2    1.0467+-0.0712i      0.9532       0.0108
## 1-0.0219B+0.9024B^2    0.0121+-1.0526i      0.9500       0.2482
## 1+0.9609B+0.8985B^2   -0.5348+-0.9094i      0.9479       0.3346
## 1-0.9484B+0.8455B^2    0.5608+-0.9318i      0.9195       0.1638
## 1+0.9010B             -1.1099               0.9010       0.5000
## 1+1.5840B+0.7807B^2   -1.0145+-0.5017i      0.8836       0.4269
## 1+0.0053B             -189.3618               0.0053       0.5000
##   
## 
dev.off()
## null device 
##           1
acf(ar.est$res)
ljung.wge(ar.est$res)
## Obs -0.003038258 0.03005427 -0.02099706 -0.0002871221 0.05986671 0.01695036 0.0530138 0.02703745 -0.004438696 0.004995789 -0.0291746 -0.1051625 0.01936145 -0.04933376 0.02083684 -0.04027929 0.03924574 -0.02867814 -0.1059654 0.0128511 -0.06517079 0.04325944 0.02233475 0.07069181
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 24
## 
## $chi.square
## [1] 14.5005
## 
## $df
## [1] 24
## 
## $pval
## [1] 0.934527
ljung.wge(ar.est$res, K=48)
## Obs -0.003038258 0.03005427 -0.02099706 -0.0002871221 0.05986671 0.01695036 0.0530138 0.02703745 -0.004438696 0.004995789 -0.0291746 -0.1051625 0.01936145 -0.04933376 0.02083684 -0.04027929 0.03924574 -0.02867814 -0.1059654 0.0128511 -0.06517079 0.04325944 0.02233475 0.07069181 -0.02561514 -0.02072488 -0.02436513 0.07604687 -0.1057342 0.1075884 0.06789439 0.01064667 0.03701851 -0.04101051 -0.02696131 0.1481545 -0.02702826 0.07297504 -0.015737 -0.03237783 -0.06675938 -0.1181091 -0.0329735 -0.1023404 -0.008890106 -0.05093476 0.05829844 0.1212425
## $test
## [1] "Ljung-Box test"
## 
## $K
## [1] 48
## 
## $chi.square
## [1] 51.63008
## 
## $df
## [1] 48
## 
## $pval
## [1] 0.3338754
# both tests reject white noise
m2.result = fore.sigplusnoise.wge(x, linear = TRUE, max.p = 15, n.ahead = 3, lastn = TRUE, limits=TRUE)
##   
##   
## Coefficients of AR polynomial:  
## 1.0866 -0.0799 -0.0683 0.0920 -0.1896 0.1230 -0.0848 0.0812 0.0994 -0.2389 0.2041 0.3393 -0.4123 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.6637B+0.9197B^2    0.9044+-0.5189i      0.9590       0.0829
## 1-1.9090B+0.9150B^2    1.0432+-0.0685i      0.9566       0.0104
## 1-0.0231B+0.9079B^2    0.0127+-1.0494i      0.9528       0.2481
## 1+0.9692B+0.9021B^2   -0.5372+-0.9055i      0.9498       0.3352
## 1-0.9461B+0.8493B^2    0.5570+-0.9312i      0.9216       0.1642
## 1+0.9029B             -1.1075               0.9029       0.5000
## 1+1.5832B+0.7800B^2   -1.0148+-0.5021i      0.8832       0.4269
##   
## 
ASE.short.m2 = mean((df$AVG_EP[264:266]-m2.result$f)^2)
ASE.short.m2 # 6.718437e-05 -> 0.0000672
## [1] 6.718437e-05
m2.result.long = fore.sigplusnoise.wge(x, linear = TRUE, max.p = 15, n.ahead = 36, lastn = TRUE, limits=TRUE)
##   
##   
## Coefficients of AR polynomial:  
## 1.0866 -0.0799 -0.0683 0.0920 -0.1896 0.1230 -0.0848 0.0812 0.0994 -0.2389 0.2041 0.3393 -0.4123 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.6637B+0.9197B^2    0.9044+-0.5189i      0.9590       0.0829
## 1-1.9090B+0.9150B^2    1.0432+-0.0685i      0.9566       0.0104
## 1-0.0231B+0.9079B^2    0.0127+-1.0494i      0.9528       0.2481
## 1+0.9692B+0.9021B^2   -0.5372+-0.9055i      0.9498       0.3352
## 1-0.9461B+0.8493B^2    0.5570+-0.9312i      0.9216       0.1642
## 1+0.9029B             -1.1075               0.9029       0.5000
## 1+1.5832B+0.7800B^2   -1.0148+-0.5021i      0.8832       0.4269
##   
## 
ASE.long.m2 = mean((df$AVG_EP[(266-36+1):266]-m2.result.long$f)^2)
ASE.long.m2 # 0.0001268439
## [1] 0.0001268439

Multivariate - VAR model

train.var = df[1:230,2:5]
test.var = df[231:266,2:5]

# Prior to fitting looking at correlation plot to get an idea
ccf(train.var$CPI, train.var$AVG_EP)

# Looks like Gas Price lag2 is most correlated with electricity price
ccf(train.var$GAS_P, train.var$AVG_EP)

# lag 0 or 1 of temperature seems most correlated with electricity price ( although this correlation is weaker compared to Gas and CPI)
ccf(train.var$TEMP, train.var$AVG_EP)

# AIC and FPE selected lag = 11, Schwarz Criterion selected lag = 6, Hannan Quinn Criterion selected lag = 3
# Try them all and compare their short term and long term ASE
VARselect(train.var, lag.max=15, type="both", season = NULL, exogen = NULL)
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##     11      6      3     11 
## 
## $criteria
##                    1             2             3             4             5
## AIC(n) -1.283887e+01 -1.379340e+01 -1.430460e+01 -1.446431e+01 -1.457456e+01
## HQ(n)  -1.268684e+01 -1.354002e+01 -1.394988e+01 -1.400824e+01 -1.401713e+01
## SC(n)  -1.246261e+01 -1.316630e+01 -1.342667e+01 -1.333554e+01 -1.319495e+01
## FPE(n)  2.655678e-06  1.022636e-06  6.136368e-07  5.234925e-07  4.694594e-07
##                    6             7             8             9            10
## AIC(n) -1.468531e+01 -1.461657e+01 -1.462841e+01 -1.469184e+01 -1.476153e+01
## HQ(n)  -1.402654e+01 -1.385645e+01 -1.376693e+01 -1.372901e+01 -1.369735e+01
## SC(n)  -1.305486e+01 -1.273529e+01 -1.249629e+01 -1.230887e+01 -1.212773e+01
## FPE(n)  4.210328e-07  4.521535e-07  4.483443e-07  4.226040e-07  3.962645e-07
##                   11            12            13            14            15
## AIC(n) -1.484743e+01 -1.479751e+01 -1.484364e+01 -1.478650e+01 -1.472243e+01
## HQ(n)  -1.368191e+01 -1.353063e+01 -1.347541e+01 -1.331692e+01 -1.315150e+01
## SC(n)  -1.196279e+01 -1.166203e+01 -1.145733e+01 -1.114935e+01 -1.083443e+01
## FPE(n)  3.660212e-07  3.877759e-07  3.737362e-07  4.000256e-07  4.318945e-07
# Try lag = 11
lsfit.p11 = VAR(train.var, p = 11, type ="both")
preds.p11.short = predict(lsfit.p11, n.ahead = 3)
preds.p11.long = predict(lsfit.p11, n.ahead = 36)

test_ASE_p11_short = mean((preds.p11.short$fcst$AVG_EP[,1]-test.var$AVG_EP[1:3])^2)
test_ASE_p11_short # 1.40152e-05 -> 0.000014
## [1] 1.40152e-05
test_ASE_p11_long = mean((preds.p11.long$fcst$AVG_EP[,1]-test.var$AVG_EP[1:36])^2)
test_ASE_p11_long # 0.0001447111
## [1] 0.0001447111
# Try lag = 6
lsfit.p6 = VAR(train.var, p = 6, type ="both")
preds.p6.short = predict(lsfit.p6, n.ahead = 3)
preds.p6.long = predict(lsfit.p6, n.ahead = 36)

test_ASE_p6_short = mean((preds.p6.short$fcst$AVG_EP[,1]-test.var$AVG_EP[1:3])^2)
test_ASE_p6_short # 9.127952e-06 -> 0.00000913
## [1] 9.127952e-06
test_ASE_p6_long = mean((preds.p6.long$fcst$AVG_EP[,1]-test.var$AVG_EP[1:36])^2)
test_ASE_p6_long # 0.0001166065
## [1] 0.0001166065
# Try lag = 3
lsfit.p3 = VAR(train.var, p = 3, type ="both")
preds.p3.short = predict(lsfit.p3, n.ahead = 3)
preds.p3.long = predict(lsfit.p3, n.ahead = 36)

test_ASE_p3_short = mean((preds.p3.short$fcst$AVG_EP[,1]-test.var$AVG_EP[1:3])^2)
test_ASE_p3_short # 2.780987e-06 -> 0.000002781
## [1] 2.780987e-06
test_ASE_p3_long = mean((preds.p3.long$fcst$AVG_EP[,1]-test.var$AVG_EP[1:36])^2)
test_ASE_p3_long # 9.241909e-05 -> 0.00009242
## [1] 9.241909e-05

Visualization for VAR best model lag = 3

# Short Term Forecast - VAR best model

t = 200:233
plot(t, df$AVG_EP[200:233], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(200, 236),ylim=c(0.10,0.16),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast VAR")
points(230:233,c(df$AVG_EP[230:230],preds.p3.short$fcst$AVG_EP[,1]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
points(230:233,c(df$AVG_EP[230:230],preds.p3.short$fcst$AVG_EP[,3]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
points(230:233,c(df$AVG_EP[230:230],preds.p3.short$fcst$AVG_EP[,2]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')

# Long Term Forecast - VAR best model

t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 266),ylim=c(0.05,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast VAR")
points(230:266,c(df$AVG_EP[230:230],preds.p3.long$fcst$AVG_EP[,1]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
points(230:266,c(df$AVG_EP[230:230],preds.p3.long$fcst$AVG_EP[,3]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
points(230:266,c(df$AVG_EP[230:230],preds.p3.long$fcst$AVG_EP[,2]),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')

Multi Layer Perceptural - Neural Network Model

df.train = ts(df[1:230,2], start = c(2000,1), frequency = 12)
df.test = ts(df[231:266,2], start=c(2019,3), frequency = 12)

other.predictors.train = df[1:230,3:5]
set.seed(2)
fit.mlp1 = mlp(df.train, reps = 20, comb = "median", xreg = other.predictors.train,
               xreg.lags = c(1,1,1), allow.det.season = FALSE, hd = 5, difforder = 0, sel.lag = FALSE)
fit.mlp1
## MLP fit with 5 hidden nodes and 20 repetitions.
## Univariate lags: (1,2,3,4,5,6,7,8,9,10,11,12)
## 3 regressors included.
## - Regressor 1 lags: (1)
## - Regressor 2 lags: (1)
## - Regressor 3 lags: (1)
## Forecast combined using the median operator.
## MSE: 0.
plot(fit.mlp1)

# Forecast next 36 CPI
set.seed(2)
fit.mlp.cpi = mlp(ts(df$CPI[1:230], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
                  allow.det.season = TRUE)
fore.mlp.cpi = forecast(fit.mlp.cpi, h=36)
plot(fore.mlp.cpi)

# Forecast next 36 Gas_P
set.seed(2)
fit.mlp.gas = mlp(ts(df$GAS_P[1:230], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
                  allow.det.season = TRUE)
fore.mlp.gas = forecast(fit.mlp.gas, h=36)
plot(fore.mlp.gas)

# Forecast next 36 Temperature
set.seed(2)
fit.mlp.temp = mlp(ts(df$TEMP[1:230], start = c(2000,1), frequency = 12), reps = 20, comb = "median",
                  allow.det.season = TRUE)
fore.mlp.temp = forecast(fit.mlp.temp, h=36)
plot(fore.mlp.temp)

other.predictors.with.forecast = data.frame(CPI = c(df$CPI[1:230],fore.mlp.cpi$mean[1:36]),
                                            GAS_P = c(df$GAS_P[1:230],fore.mlp.gas$mean[1:36]),
                                            GAS_P = c(df$TEMP[1:230],fore.mlp.temp$mean[1:36]))
fore.mlp1.short = forecast(fit.mlp1, h=3, xreg = other.predictors.with.forecast)
test_ASE_mlp1_short = mean((fore.mlp1.short$mean[1:3]-df$AVG_EP[231:233])^2)
test_ASE_mlp1_short # 1.947679e-05 -> 0.00001947
## [1] 1.947679e-05
fore.mlp1.long = forecast(fit.mlp1, h=36, xreg = other.predictors.with.forecast)
test_ASE_mlp1_long = mean((fore.mlp1.long$mean[1:36]-df$AVG_EP[231:266])^2)
test_ASE_mlp1_long # 0.000111
## [1] 0.0001109964

Visualization for NN Model

# Short Term Forecast - MLP model
t = 200:233
plot(t, df$AVG_EP[200:233], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(200, 236),ylim=c(0.10,0.16),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast MLP")
points(230:233,c(df$AVG_EP[230:230],fore.mlp1.short$mean[1:3]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)

# Long Term Forecast - VAR best model
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 266),ylim=c(0.05,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast MLP")
points(230:266,c(df$AVG_EP[230:230],fore.mlp1.long$mean[1:36]),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)

Ensemble ARIMA(7,1,4), Best VAR and MLP

ensemble.forecast.short = (pred.short$f+preds.p3.short$fcst$AVG_EP[,1]+fore.mlp1.short$mean[1:3])/3
test_ASE_ensemble1.short = mean((ensemble.forecast.short-df$AVG_EP[231:233])^2)
test_ASE_ensemble1.short # 3.312542e-06 -> 0.0000033
## [1] 3.312542e-06
ensemble.forecast.long = (pred.long$f+preds.p3.long$fcst$AVG_EP[,1]+fore.mlp1.long$mean[1:36])/3
test_ASE_ensemble1.long = mean((ensemble.forecast.long-df$AVG_EP[231:266])^2)
test_ASE_ensemble1.long # 7.819804e-05 -> 0.0000782
## [1] 7.819804e-05

Visualization for Ensemble Model

# Short Term Forecast - Ensemble model
t = 200:233
plot(t, df$AVG_EP[200:233], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(200, 236),ylim=c(0.10,0.16),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.1,1.1,1.1),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Short Term Average Electricity Price Forecast Ensemble")
points(230:233,c(df$AVG_EP[230:230],ensemble.forecast.short),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)

# Long Term Forecast - Ensemble best model
t = 1:266
plot(t, df$AVG_EP[1:266], type='l',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1, 266),ylim=c(0.05,0.18),col=1)
axis(side=1,cex.axis=1.0,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.0,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.0,1.0,1.0),text=c("Time","Price",""),line=c(1.2,2.1,1.8))
title("Long Term Average Electricity Price Forecast Ensemble")
points(230:266,c(df$AVG_EP[230:230],ensemble.forecast.long),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)

Visualization

library(ggplot2)
df[['DATE']] <- as.Date(df[['DATE']], format='%Y-%m-%d')

# Convert short term prediction to dataframe
pred.short = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
                            d = 1, n.ahead = 3, limits = T, lastn = F)
dev.off()
t = 250:266
plot(t, df$AVG_EP[250:266], type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(250, 270),ylim=c(0.12,0.175),col=1)
axis(side=1,cex.axis=1.1,mgp=c(3,0.15,0),tcl=-.3)
axis(side=2,las=1,cex.axis=1.1,mgp=c(3,.4,0),tcl=-.3)
mtext(side=c(1,2,1),cex=c(1.2,1.2,1.2),text=c("Time","",""),line=c(1.2,2.1,1.8))
points(266:269,c(df$AVG_EP[266:266],f.short.15.5$f),type='o',lty=1,cex=.6,lwd=1,pch=1,col=2)
points(266:269,c(df$AVG_EP[266:266],f.short.15.5$ul),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')
points(266:269,c(df$AVG_EP[266:266],f.short.15.5$ll),type='l',lty=3,cex=0.6,lwd=2,pch=1,col='blue3')

pred.long = fore.arima.wge(df$AVG_EP, phi = model1.param$model.coef, theta = params.est$theta,
                            d = 1, n.ahead = 36, limits = T, lastn = F)

pred.short.df = data.frame(Date = df$DATE[264:266], prediction = pred.short$f, upper = pred.short$ul, lower = pred.short$ll)

colors <- c("Actual Price" = "#00AFBB", "Predicted Price" = "#FC4E07", "Prediction Limit"= "#E7B800")

# Original TS plot:
ggplot(data = df[1:266,], aes(x=DATE, y=AVG_EP))+
  geom_line(color="#00AFBB", size = 1) +
  geom_point(color="#00AFBB") +
  ggtitle("Electricity Price Over time")+xlab("Date")+ylab("Average Price") + theme_classic()


ggplot(data = df[260:266,], aes(x=DATE, y=AVG_EP, color="Actual Price"))+
  geom_line( size = 1) +
  ggtitle("Short term average Electricity price forecast")+xlab("Date")+ylab("Average Price") +
  geom_line(data = pred.short.df,aes(x=Date, y=prediction), color="#FC4E07", size=1.1) +
  geom_point(data = pred.short.df,aes(x=Date, y=prediction), color="#FC4E07", size=1.5)+
  geom_line(data = pred.short.df,aes(x=Date, y=upper), color="#E7B800", size=1.1)+
  geom_point(data = pred.short.df,aes(x=Date, y=upper), color="#E7B800", size=1.1)+
  geom_line(data = pred.short.df,aes(x=Date, y=lower), color="#E7B800", size=1.1)+ 
  geom_point(data = pred.short.df,aes(x=Date, y=lower), color="#E7B800", size=1.1)+
  scale_color_manual(values = colors) + theme_classic()

Trial - Nicole

plotts.sample.wge(df$AVG_EP)

## $xbar
## [1] 0.1177444
## 
## $autplt
##  [1] 1.0000000 0.9602995 0.9151642 0.8723585 0.8295085 0.7865279 0.7574500
##  [8] 0.7362720 0.7262652 0.7196580 0.7114933 0.7017810 0.6879721 0.6568276
## [15] 0.6238992 0.5937430 0.5625050 0.5322346 0.5184125 0.5104122 0.5059540
## [22] 0.5026686 0.4994694 0.4952331 0.4869328 0.4606747
## 
## $freq
##   [1] 0.003759398 0.007518797 0.011278195 0.015037594 0.018796992 0.022556391
##   [7] 0.026315789 0.030075188 0.033834586 0.037593985 0.041353383 0.045112782
##  [13] 0.048872180 0.052631579 0.056390977 0.060150376 0.063909774 0.067669173
##  [19] 0.071428571 0.075187970 0.078947368 0.082706767 0.086466165 0.090225564
##  [25] 0.093984962 0.097744361 0.101503759 0.105263158 0.109022556 0.112781955
##  [31] 0.116541353 0.120300752 0.124060150 0.127819549 0.131578947 0.135338346
##  [37] 0.139097744 0.142857143 0.146616541 0.150375940 0.154135338 0.157894737
##  [43] 0.161654135 0.165413534 0.169172932 0.172932331 0.176691729 0.180451128
##  [49] 0.184210526 0.187969925 0.191729323 0.195488722 0.199248120 0.203007519
##  [55] 0.206766917 0.210526316 0.214285714 0.218045113 0.221804511 0.225563910
##  [61] 0.229323308 0.233082707 0.236842105 0.240601504 0.244360902 0.248120301
##  [67] 0.251879699 0.255639098 0.259398496 0.263157895 0.266917293 0.270676692
##  [73] 0.274436090 0.278195489 0.281954887 0.285714286 0.289473684 0.293233083
##  [79] 0.296992481 0.300751880 0.304511278 0.308270677 0.312030075 0.315789474
##  [85] 0.319548872 0.323308271 0.327067669 0.330827068 0.334586466 0.338345865
##  [91] 0.342105263 0.345864662 0.349624060 0.353383459 0.357142857 0.360902256
##  [97] 0.364661654 0.368421053 0.372180451 0.375939850 0.379699248 0.383458647
## [103] 0.387218045 0.390977444 0.394736842 0.398496241 0.402255639 0.406015038
## [109] 0.409774436 0.413533835 0.417293233 0.421052632 0.424812030 0.428571429
## [115] 0.432330827 0.436090226 0.439849624 0.443609023 0.447368421 0.451127820
## [121] 0.454887218 0.458646617 0.462406015 0.466165414 0.469924812 0.473684211
## [127] 0.477443609 0.481203008 0.484962406 0.488721805 0.492481203 0.496240602
## [133] 0.500000000
## 
## $dbz
##   [1]  12.5686893  12.3173433  11.8982305  11.3114379  10.5579811   9.6409858
##   [7]   8.5676671   7.3524912   6.0218430   4.6198784   3.2132709   1.8890411
##  [13]   0.7381312  -0.1737122  -0.8283465  -1.2512906  -1.4891883  -1.5921087
##  [19]  -1.6091720  -1.5896992  -1.5821599  -1.6303232  -1.7694859  -2.0245855
##  [25]  -2.4100298  -2.9302313  -3.5799832  -4.3442938  -5.1978261  -6.1046537
##  [31]  -7.0196173  -7.8927223  -8.6769837  -9.3376650  -9.8587949 -10.2438964
##  [37] -10.5116138 -10.6894307 -10.8079797 -10.8966372 -10.9802874 -11.0772077
##  [43] -11.1981223 -11.3463448 -11.5187772 -11.7074923 -11.9016724 -12.0897493
##  [49] -12.2616000 -12.4105708 -12.5349379 -12.6383063 -12.7285740 -12.8154841
##  [55] -12.9072621 -13.0071528 -13.1107990 -13.2054874 -13.2722777 -13.2914789
##  [61] -13.2502907 -13.1493354 -13.0044255 -12.8426050 -12.6950565 -12.5905515
##  [67] -12.5514804 -12.5924245 -12.7202475 -12.9346975 -13.2288848 -13.5893678
##  [73] -13.9958765 -14.4210309 -14.8308363 -15.1871536 -15.4531999 -15.6016961
##  [79] -15.6227241 -15.5269821 -15.3423318 -15.1058464 -14.8557019 -14.6258190
##  [85] -14.4436045 -14.3297151 -14.2986430 -14.3593301 -14.5154180 -14.7649981
##  [91] -15.0998965 -15.5047232 -15.9562371 -16.4240086 -16.8735912 -17.2727527
##  [97] -17.5992900 -17.8466594 -18.0238263 -18.1492694 -18.2427639 -18.3189949
## [103] -18.3848575 -18.4401307 -18.4802628 -18.4998799 -18.4957898 -18.4686207
## [109] -18.4228343 -18.3655051 -18.3046120 -18.2475191 -18.1999932 -18.1657677
## [115] -18.1464773 -18.1417432 -18.1492234 -18.1645248 -18.1809693 -18.1893543
## [121] -18.1780207 -18.1336861 -18.0434445 -17.8979003 -17.6946141 -17.4404011
## [127] -17.1512365 -16.8496724 -16.5609037 -16.3089797 -16.1141036 -15.9911686
## [133] -15.9491882
# slowly damping autocorrelations
# spectral density peaks at 0 (wandering behavior)
# possible seasonal peak at 1/12 = .0833333 (monthly)
# est.ar.wge(df$AVG_EP,p=15,type="burg")
# factor.wge(phi=c(rep(0,11),1))
# some evidence of s=12 in overfit, though Abs Recip should be a higher

# take difference and look at acf
AVE_EP_d1 = artrans.wge(df$AVG_EP,phi.tr=1)

dev.off()
## null device 
##           1
acf(AVE_EP_d1)
plotts.sample.wge(AVE_EP_d1)
## $xbar
## [1] 0.0003509434
## 
## $autplt
##  [1]  1.000000000  0.143400409  0.007216805  0.004618813 -0.003852025
##  [6] -0.190246498 -0.124240595 -0.175915478 -0.025667511  0.058883404
## [11] -0.083321390  0.090451630  0.411172282  0.100208511 -0.065665133
## [16]  0.023298835 -0.063084353 -0.181683095 -0.140869541 -0.222865159
## [21] -0.045233335 -0.018336048 -0.015243214  0.088588981  0.368127820
## [26]  0.038711866
## 
## $freq
##   [1] 0.003773585 0.007547170 0.011320755 0.015094340 0.018867925 0.022641509
##   [7] 0.026415094 0.030188679 0.033962264 0.037735849 0.041509434 0.045283019
##  [13] 0.049056604 0.052830189 0.056603774 0.060377358 0.064150943 0.067924528
##  [19] 0.071698113 0.075471698 0.079245283 0.083018868 0.086792453 0.090566038
##  [25] 0.094339623 0.098113208 0.101886792 0.105660377 0.109433962 0.113207547
##  [31] 0.116981132 0.120754717 0.124528302 0.128301887 0.132075472 0.135849057
##  [37] 0.139622642 0.143396226 0.147169811 0.150943396 0.154716981 0.158490566
##  [43] 0.162264151 0.166037736 0.169811321 0.173584906 0.177358491 0.181132075
##  [49] 0.184905660 0.188679245 0.192452830 0.196226415 0.200000000 0.203773585
##  [55] 0.207547170 0.211320755 0.215094340 0.218867925 0.222641509 0.226415094
##  [61] 0.230188679 0.233962264 0.237735849 0.241509434 0.245283019 0.249056604
##  [67] 0.252830189 0.256603774 0.260377358 0.264150943 0.267924528 0.271698113
##  [73] 0.275471698 0.279245283 0.283018868 0.286792453 0.290566038 0.294339623
##  [79] 0.298113208 0.301886792 0.305660377 0.309433962 0.313207547 0.316981132
##  [85] 0.320754717 0.324528302 0.328301887 0.332075472 0.335849057 0.339622642
##  [91] 0.343396226 0.347169811 0.350943396 0.354716981 0.358490566 0.362264151
##  [97] 0.366037736 0.369811321 0.373584906 0.377358491 0.381132075 0.384905660
## [103] 0.388679245 0.392452830 0.396226415 0.400000000 0.403773585 0.407547170
## [109] 0.411320755 0.415094340 0.418867925 0.422641509 0.426415094 0.430188679
## [115] 0.433962264 0.437735849 0.441509434 0.445283019 0.449056604 0.452830189
## [121] 0.456603774 0.460377358 0.464150943 0.467924528 0.471698113 0.475471698
## [127] 0.479245283 0.483018868 0.486792453 0.490566038 0.494339623 0.498113208
## 
## $dbz
##   [1] -1.120417257 -1.057321119 -0.967803862 -0.871366652 -0.788722954
##   [6] -0.737328900 -0.727027932 -0.755566598 -0.803936769 -0.832929506
##  [11] -0.785111771 -0.598357730 -0.231544277  0.311855412  0.982548849
##  [16]  1.707474033  2.415388266  3.050760530  3.575864464  3.967301805
##  [21]  4.211630518  4.301984419  4.236042901  4.015232297  3.645031667
##  [26]  3.136374621  2.508200347  1.791000750  1.030379668  0.287870305
##  [31] -0.365621850 -0.866725662 -1.180883021 -1.312713072 -1.296521583
##  [36] -1.176282765 -0.990656767 -0.768813704 -0.533255163 -0.303994930
##  [41] -0.100901678  0.056300912  0.149446791  0.163653216  0.088751394
##  [46] -0.079643613 -0.339188280 -0.679708681 -1.081799949 -1.515313688
##  [51] -1.938507184 -2.299460174 -2.541922372 -2.616659878 -2.495489134
##  [56] -2.180831391 -1.704558815 -1.117208409 -0.474653659  0.171621987
##  [61]  0.779050466  1.314716300  1.754229965  2.080053396  2.279937105
##  [66]  2.345730221  2.272647177  2.059027183  1.706645504  1.221699857
##  [71]  0.616681931 -0.086592792 -0.852639464 -1.627201727 -2.334032236
##  [76] -2.879813628 -3.174836472 -3.166982246 -2.866661700 -2.339837877
##  [81] -1.676706490 -0.962703359 -0.265241733  0.367438140  0.902603911
##  [86]  1.318781847  1.602349308  1.745160510  1.743061309  1.595140367
##  [91]  1.303644764  0.874585701  0.319147750 -0.343956826 -1.085220560
##  [96] -1.860869906 -2.611439622 -3.266625145 -3.760214129 -4.051758377
## [101] -4.141254015 -4.064681630 -3.874998452 -3.623685594 -3.351483551
## [106] -3.086961339 -2.848371047 -2.645761586 -2.482366783 -2.355417603
## [111] -2.256930428 -2.175055600 -2.096363575 -2.008973862 -1.905767886
## [116] -1.786446623 -1.657390290 -1.529195151 -1.412773358 -1.315320336
## [121] -1.237242196 -1.170777151 -1.100883924 -1.008811114 -0.878045087
## [126] -0.700876001 -0.482625607 -0.241275313 -0.002837887  0.204940314
## [131]  0.357919662  0.438844743
# characteristic seasonal component for 12 months (ACF large at multiples of 12)
AVE_EP_d1_s12 = artrans.wge(AVE_EP_d1,phi.tr=c(rep(0,11),1))
plotts.sample.wge(AVE_EP_d1_s12)
## $xbar
## [1] 9.881423e-05
## 
## $autplt
##  [1]  1.000000000  0.061881369  0.149355139 -0.067465091  0.065996069
##  [6]  0.005480036  0.014532954  0.081574193 -0.007551414  0.110805568
## [11] -0.133491536 -0.005427407 -0.467952630  0.044216355 -0.078037716
## [16]  0.019932518 -0.072550894  0.058968103 -0.061592872 -0.153638588
## [21] -0.019250759 -0.075084909  0.074778865 -0.011612515 -0.027345280
## [26] -0.052413426
## 
## $freq
##   [1] 0.003952569 0.007905138 0.011857708 0.015810277 0.019762846 0.023715415
##   [7] 0.027667984 0.031620553 0.035573123 0.039525692 0.043478261 0.047430830
##  [13] 0.051383399 0.055335968 0.059288538 0.063241107 0.067193676 0.071146245
##  [19] 0.075098814 0.079051383 0.083003953 0.086956522 0.090909091 0.094861660
##  [25] 0.098814229 0.102766798 0.106719368 0.110671937 0.114624506 0.118577075
##  [31] 0.122529644 0.126482213 0.130434783 0.134387352 0.138339921 0.142292490
##  [37] 0.146245059 0.150197628 0.154150198 0.158102767 0.162055336 0.166007905
##  [43] 0.169960474 0.173913043 0.177865613 0.181818182 0.185770751 0.189723320
##  [49] 0.193675889 0.197628458 0.201581028 0.205533597 0.209486166 0.213438735
##  [55] 0.217391304 0.221343874 0.225296443 0.229249012 0.233201581 0.237154150
##  [61] 0.241106719 0.245059289 0.249011858 0.252964427 0.256916996 0.260869565
##  [67] 0.264822134 0.268774704 0.272727273 0.276679842 0.280632411 0.284584980
##  [73] 0.288537549 0.292490119 0.296442688 0.300395257 0.304347826 0.308300395
##  [79] 0.312252964 0.316205534 0.320158103 0.324110672 0.328063241 0.332015810
##  [85] 0.335968379 0.339920949 0.343873518 0.347826087 0.351778656 0.355731225
##  [91] 0.359683794 0.363636364 0.367588933 0.371541502 0.375494071 0.379446640
##  [97] 0.383399209 0.387351779 0.391304348 0.395256917 0.399209486 0.403162055
## [103] 0.407114625 0.411067194 0.415019763 0.418972332 0.422924901 0.426877470
## [109] 0.430830040 0.434782609 0.438735178 0.442687747 0.446640316 0.450592885
## [115] 0.454545455 0.458498024 0.462450593 0.466403162 0.470355731 0.474308300
## [121] 0.478260870 0.482213439 0.486166008 0.490118577 0.494071146 0.498023715
## 
## $dbz
##   [1]  0.256286821  0.546568420  0.952837510  1.397969554  1.818517774
##   [6]  2.171517399  2.431835518  2.586947169  2.632367320  2.568479159
##  [11]  2.398581638  2.127839581  1.762935993  1.312399163  0.787751723
##  [16]  0.205768851 -0.407883853 -1.013892123 -1.555395592 -1.960157417
##  [21] -2.155909549 -2.098691610 -1.796550260 -1.306688445 -0.708789499
##  [26] -0.077950661  0.528174551  1.070090033  1.522194178  1.868123676
##  [31]  2.097279981  2.202672411  2.179826470  2.026544105  1.743464190
##  [36]  1.335558727  0.814865432  0.204765970 -0.454441082 -1.100884316
##  [41] -1.653410996 -2.029142815 -2.175656259 -2.096353000 -1.844515099
##  [46] -1.492349427 -1.102864557 -0.718933511 -0.365235922 -0.054692828
##  [51]  0.205443392  0.409412480  0.550750500  0.621643480  0.613302099
##  [56]  0.516918667  0.324973300  0.032925741 -0.358383402 -0.839054363
##  [61] -1.384612777 -1.949022340 -2.459779620 -2.824432126 -2.959127196
##  [66] -2.831252082 -2.480174531 -1.993584247 -1.465174899 -0.968259754
##  [71] -0.550901214 -0.241471621 -0.055723628 -0.002093647 -0.084856825
##  [76] -0.305480057 -0.662376918 -1.148854163 -1.748558382 -2.427506214
##  [81] -3.123025247 -3.735547164 -4.140359499 -4.236959997 -4.011963346
##  [86] -3.548246410 -2.968531294 -2.377735302 -1.842765671 -1.397726628
##  [91] -1.055164236 -0.815182020 -0.671416274 -0.614833756 -0.636258901
##  [96] -0.728051343 -0.884911360 -1.103495838 -1.380378027 -1.707835733
## [101] -2.067067325 -2.419280736 -2.698181090 -2.813739011 -2.680178203
## [106] -2.262694762 -1.603612522 -0.798765477  0.049284527  0.860873032
## [111]  1.584517262  2.190512434  2.662960956  2.993768597  3.178835139
## [116]  3.215817299  3.102884820  2.838125422  2.419508272  1.845626764
## [121]  1.117960269  0.246390726 -0.738560101 -1.761896007 -2.670695542
## [126] -3.229542302
dev.off()
## null device 
##           1
acf(AVE_EP_d1_s12)
# doesn't look like white noise
# aic5.wge(AVE_EP_d1_s12,p=0:15) #13,0 12,2 12,0
EP_est = est.arma.wge(AVE_EP_d1_s12,p=12)
##   
##   
## Coefficients of AR polynomial:  
## 0.0678 0.0697 -0.0351 0.0627 0.0391 0.0130 0.0534 0.0131 0.0759 -0.0687 -0.0137 -0.4792 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1+1.8492B+0.9205B^2   -1.0044+-0.2784i      0.9594       0.4570
## 1-1.8551B+0.9104B^2    1.0189+-0.2456i      0.9541       0.0377
## 1-1.3412B+0.9077B^2    0.7388+-0.7456i      0.9527       0.1257
## 1-0.4737B+0.8816B^2    0.2687+-1.0306i      0.9389       0.2094
## 1+1.2920B+0.8465B^2   -0.7631+-0.7739i      0.9201       0.3739
## 1+0.4610B+0.8443B^2   -0.2730+-1.0535i      0.9188       0.2904
##   
## 
# short 3 month ARUMA(12,1,0) s=12
# roll.win.ase.wge(df$AVG_EP,horizon=3,s=12,d=1,phis=EP_est$phi,thetas=EP_est$theta)
#   2.8495e-05 -> 0.0000285
f.short = fore.arima.wge(df$AVG_EP,phi=EP_est$phi,theta=EP_est$theta,s=12,d=1,n.ahead=3,lastn=TRUE)
ASE.short = mean((df$AVG_EP[264:266]-f.short$f)^2)
ASE.short #3.595854e-05 -> 0.000036
## [1] 3.595854e-05
# long 36 month ARUMA(12,1,0) s=12
# roll.win.ase.wge(df$AVG_EP,horizon=36,s=12,d=1,phis=EP_est$phi,thetas=EP_est$theta)
# 0.0004547268
f.long = fore.arima.wge(df$AVG_EP,phi=EP_est$phi,theta=EP_est$theta,s=12,d=1,n.ahead=36,lastn=TRUE)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-f.long$f)^2)
ASE.long 
## [1] 0.001223659
#0.001223659

# try other AIC5 options with long forecast: ARUMA(13,1,0) s=12 
EP_est_13 = est.arma.wge(AVE_EP_d1_s12,p=13)
##   
##   
## Coefficients of AR polynomial:  
## 0.1128 0.0693 -0.0277 0.0568 0.0365 0.0050 0.0535 0.0144 0.0724 -0.0676 -0.0225 -0.4814 0.0981 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1+1.8636B+0.9335B^2   -0.9982+-0.2736i      0.9662       0.4574
## 1-1.3203B+0.8982B^2    0.7350+-0.7571i      0.9477       0.1273
## 1-1.8307B+0.8877B^2    1.0311+-0.2516i      0.9422       0.0381
## 1-0.4434B+0.8830B^2    0.2510+-1.0341i      0.9397       0.2121
## 1+1.3242B+0.8664B^2   -0.7642+-0.7551i      0.9308       0.3760
## 1+0.4947B+0.8570B^2   -0.2886+-1.0409i      0.9257       0.2930
## 1-0.2010B              4.9750               0.2010       0.0000
##   
## 
# roll.win.ase.wge(df$AVG_EP,horizon=36,s=12,d=1,phis=EP_est_13$phi,thetas=EP_est_13$theta)
#   0.0004697769
f = fore.arima.wge(df$AVG_EP,phi=EP_est_13$phi,theta=EP_est_13$theta,s=12,d=1,n.ahead=36,lastn=TRUE)
mean((df$AVG_EP[(266-36+1):266]-f$f)^2)
## [1] 0.001237814
#0.001237814

# try 12,2 ARUMA(12,1,2) s=12
EP_est_12_2 = est.arma.wge(AVE_EP_d1_s12,p=12,q=2)
##   
##   
## Coefficients of AR polynomial:  
## -0.1173 -0.1001 0.0061 0.0723 0.0347 0.0251 0.0581 0.0310 0.0890 -0.0515 -0.0205 -0.5046 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1+1.8498B+0.9224B^2   -1.0027+-0.2806i      0.9604       0.4566
## 1-0.4248B+0.9030B^2    0.2353+-1.0257i      0.9502       0.2141
## 1-1.3025B+0.8935B^2    0.7288+-0.7668i      0.9453       0.1290
## 1+0.4967B+0.8923B^2   -0.2783+-1.0214i      0.9446       0.2923
## 1+1.3066B+0.8760B^2   -0.7458+-0.7651i      0.9359       0.3730
## 1-1.8084B+0.8674B^2    1.0424+-0.2573i      0.9313       0.0385
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## -0.2340 -0.2435 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1+0.2340B+0.2435B^2   -0.4806+-1.9687i      0.4935       0.2881
##   
## 
# roll.win.ase.wge(df$AVG_EP,horizon=36,s=12,d=1,phis=EP_est_12_2$phi,thetas=EP_est_12_2$theta)
#  0.0004835913
f = fore.arima.wge(df$AVG_EP,phi=EP_est_12_2$phi,theta=EP_est_12_2$theta,s=12,d=1,n.ahead=36,lastn=TRUE)
mean((df$AVG_EP[(266-36+1):266]-f$f)^2) 
## [1] 0.001275155
# 0.00128316

# None of these beat Nick's ARUMA(13,1,5) s=12

# Try another model entirely - take 1st diff then fit an ARMA
AVE_EP_d1 = artrans.wge(df$AVG_EP,phi.tr=1)
# aic5.wge(AVE_EP_d1,p=0:15,q=0:5) #15,5
# aic5.wge(AVE_EP_d1,p=10:20,q=0:5) #15,5
# aic5.wge(AVE_EP_d1,p=0:15,q=0:5,type="bic") #12,0 13,1

# ARIMA(15,1,5)
EP_est_15_5 = est.arma.wge(AVE_EP_d1,p=15,q=5)
##   
##   
## Coefficients of AR polynomial:  
## -0.1148 0.4747 0.2752 -0.1471 -0.7224 0.0089 -0.0739 0.0415 0.1338 -0.2228 -0.1037 0.2903 0.1106 -0.1338 -0.1832 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.7289B+0.9959B^2    0.8680+-0.5007i      0.9980       0.0833
## 1+0.9979B             -1.0021               0.9979       0.5000
## 1+0.9941B+0.9922B^2   -0.5009+-0.8700i      0.9961       0.3331
## 1-0.0030B+0.8512B^2    0.0018+-1.0839i      0.9226       0.2497
## 1-1.0089B+0.7780B^2    0.6484+-0.9300i      0.8820       0.1531
## 1-1.6665B+0.7403B^2    1.1255+-0.2897i      0.8604       0.0401
## 1+1.5748B+0.7012B^2   -1.1230+-0.4063i      0.8374       0.4447
## 1+0.9554B+0.5405B^2   -0.8838+-1.0340i      0.7352       0.3626
##   
##   
##   
##   
## Coefficients of MA polynomial:  
## -0.2191 0.4727 0.4063 -0.2067 -0.8269 
## 
##                               MA FACTOR TABLE 
## Factor                 Roots                Abs Recip    System Freq 
## 1+0.9732B             -1.0275               0.9732       0.5000
## 1-1.6903B+0.9350B^2    0.9039+-0.5025i      0.9670       0.0807
## 1+0.9362B+0.9087B^2   -0.5151+-0.9139i      0.9532       0.3317
##   
## 
# short 3 month
# roll.win.ase.wge(df$AVG_EP,horizon=3,d=1,phis=EP_est_15_5$phi,thetas=EP_est_15_5$theta)
#   3.695999e-05
dev.off()
## null device 
##           1
f.short.15.5 = fore.arima.wge(df$AVG_EP,phi=EP_est_15_5$phi,theta=EP_est_15_5$theta,d=1,n.ahead=3,lastn=TRUE)
ASE.short = mean((df$AVG_EP[264:266]-f.short.15.5$f)^2)
ASE.short #4.216765e-05 -> 0.000042167
## [1] 4.567978e-05
# 36 month ARIMA(15,1,5)
# roll.win.ase.wge(df$AVG_EP,horizon=36,d=1,phis=EP_est_15_5$phi,thetas=EP_est_15_5$theta)
# 0.0002368869
f.long.15.5 = fore.arima.wge(df$AVG_EP,phi=EP_est_15_5$phi,theta=EP_est_15_5$theta,d=1,n.ahead=36,lastn=TRUE)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-f.long.15.5$f)^2)
ASE.long 
## [1] 0.0001089089
# 0.0001089089

# ARIMA(12,1,0)
EP_est_12_0 = est.arma.wge(AVE_EP_d1,p=12)
##   
##   
## Coefficients of AR polynomial:  
## 0.1026 0.0192 -0.0458 0.0443 -0.1428 -0.0133 -0.0959 -0.0076 0.0919 -0.1504 0.0576 0.3936 
## 
##                            AR Factor Table 
## Factor                 Roots                Abs Recip    System Freq 
## 1-1.6643B+0.9156B^2    0.9089+-0.5159i      0.9568       0.0822
## 1-0.0267B+0.9047B^2    0.0148+-1.0513i      0.9511       0.2478
## 1+0.9674B+0.8990B^2   -0.5381+-0.9071i      0.9481       0.3352
## 1-0.9514B+0.8435B^2    0.5640+-0.9314i      0.9184       0.1633
## 1-0.9027B              1.1078               0.9027       0.0000
## 1+0.8985B             -1.1130               0.8985       0.5000
## 1+1.5766B+0.7727B^2   -1.0202+-0.5033i      0.8790       0.4271
##   
## 
# short 3 month
# roll.win.ase.wge(df$AVG_EP,horizon=3,d=1,phis=EP_est_12_0$phi)
#  2.661653e-05
f.short.12.0 = fore.arima.wge(df$AVG_EP,phi=EP_est_12_0$phi,d=1,n.ahead=3,lastn=TRUE)
ASE.short = mean((df$AVG_EP[264:266]-f.short.12.0$f)^2)
ASE.short # 8.029078e-05 -> 0.0000803
## [1] 8.029078e-05
# 36 month ARIMA(12,1,0)
# roll.win.ase.wge(df$AVG_EP,horizon=36,d=1,phis=EP_est_12_0$phi)
# 0.0002323361
f.long.12.0 = fore.arima.wge(df$AVG_EP,phi=EP_est_12_0$phi,d=1,n.ahead=36,lastn=TRUE)
ASE.long = mean((df$AVG_EP[(266-36+1):266]-f.long.12.0$f)^2)
ASE.long 
## [1] 0.0001502498
# 0.0001502498

Trial - Ricco

plotts.sample.wge(df$AVG_EP)

## over-fit like our lives depend on it
overfit_ar_30_ricco=est.ar.wge(df$AVG_EP,p=50, type='burg')

# Clearly a 1-B in there. try removing it and over-fitting again. 
AVG_E_i1_ricco = artrans.wge(df$AVG_EP, c(1))
overfit_ar_30_i1_ricco=est.ar.wge(AVG_E_i1_ricco,p=20, type='burg')
# nonstationary_lambda = mult.wge(fac1=c(1-1.7276B+0.9942B^2), c(1+0.9951B), c(1+0.0130B+0.9896B^2), c(1+0.9916B+0.9889B^2))
nonstationary_lambda_ricco = mult.wge(c(1.7276,-0.9942), c(-0.9951), c(-0.0130,-0.9896), c(-0.9916,-0.9889))
nonstationary_lambda$model.coef
AVG_E_lambda_corrected_ricco = artrans.wge(df$AVG_EP, c(nonstationary_lambda$model.coef))
aic_lambda_corrected_ricco = aic.wge(AVG_E_lambda_corrected_ricco, p=0:12, q=0:5, type="bic")
forecast_arma = fore.aruma.wge(df$AVG_EP, phi=aic_lambda_corrected_ricco$phi, theta=aic_lambda_corrected_ricco$theta, d=1, n.ahead = 36,limits = FALSE, plot=TRUE, lastn = TRUE, lambda =c(nonstationary_lambda$model.coef))

forecast_arma_ricco = fore.aruma.wge(df$AVG_EP, phi=overfit_ar_30_i1_ricco$phi,n.ahead = 36,limits = FALSE, plot=TRUE, lastn = TRUE)
## ^^^ YUCK! that looks terrible. let's try a different approach. 

# try finding best long non-stationary estimates Seasonality by rolling window rmse (this has O(n^2) runtime complexity)
best_s_ricco = 0
best_d_ricco = 0
max_s_ricco = 10
max_d_ricco = 2
best_rsme_seasonality_search_and_wandering_search = 10000.0
for (s_try in 0:max_s_ricco) {
  for (d_try in 0:max_d_ricco) {
    rolling.res.long_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, d = d_try, s=s_try, horizon = 36, plot=FALSE)
    if(rolling.res.long_ricco$rwRMSE < best_rsme_seasonality_search_and_wandering_search) {
      best_rsme_seasonality_search_and_wandering_search = rolling.res.long_ricco$rwRMSE
      best_s_ricco = s_try
      best_d_ricco = d_try
    }
  }
}
print(paste("best long rolling window s was:", best_s_ricco))
print(paste("best long rolling window d was:", best_d_ricco))

# best s=0 and d=1 
AVG_E_i1_ricco = artrans.wge(df$AVG_EP, c(1))

# now find best p and q with a rolling window wide grid search
best_p_wide_ricco = 0
best_q_wide_ricco = 0
best_cost_wide_ricco = 10000
best_rsme_s0_d1_p_search_wide = 10000.0
best_last36_ase_wide = 10000.0
q_seq_wide_ricco = seq(1, 11, 5)
p_seq_wide_ricco = seq(0, 30, 10)
for(q_try in q_seq_wide_ricco) {
  for (p_try in p_seq_wide_ricco) {
    was_error = FALSE
    tryCatch(expr = {
      print(paste("trying p: ", p_try, "and q: ", q_try))
      capture.output(est_p_q_search_ricco <- est.arma.wge(AVG_E_i1_ricco, p=p_try, q=q_try), file='NUL')    
      rolling.res.long_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, phi=est_p_q_search_ricco$phi, theta=est_p_q_search_ricco$theta, d=1, s=0,  horizon = 36, plot=FALSE)
      capture.output(f_ricco <- fore.aruma.wge(df$AVG_EP, phi=est_p_q_search_ricco$phi, d=1, s=0, n.ahead=36, lastn=TRUE, plot=FALSE))
      prediction_ase_ricco = mean((df$AVG_EP[(266-35):266]-f$f)^2) 
      # weight last 36 higher because they are the forecasts we should care about most
      cost_ricco = (rolling.res.long_ricco$rwRMSE + prediction_ase_ricco)/2
      if(cost_ricco < best_cost_wide_ricco) {
        best_cost_wide_ricco = cost_ricco
        best_rsme_s0_d1_p_search_wide = rolling.res.long_ricco$rwRMSE
        best_last36_ase_wide = prediction_ase_ricco
        best_p_wide_ricco = p_try
        best_q_wide_ricco = q_try
      }
    },
    error = function(e) {
      print(paste("an error occured for p: ", p_try, "and q: ", q_try))
      was_error = TRUE
    })
    if(was_error)
      next
  }
}
print(paste("best long + immediate rolling window p with wide search was:", best_p_wide_ricco))
print(paste("best long + immediate rolling window q with wide search was:", best_q_wide_ricco))

# now find best p and q with a rolling window narrow grid search
best_p_narrow_ricco = 0
best_q_narrow_ricco = 0
best_cost_narrow_ricco = 10000
best_rsme_s0_d1_p_search_narrow = 10000.0
best_last36_ase_narrow = 10000.0
min_q_narrow_ricco = max(c(1, best_q_wide_ricco-2))
min_p_narrow_ricco = max(c(1, best_p_wide_ricco-4))
q_seq_narrow_ricco = seq(min_q_narrow_ricco, best_q_wide_ricco+1, 1)
p_seq_narrow_ricco = seq(min_p_narrow_ricco, best_p_wide_ricco+2, 1)

best_arma_estimates = NULL
for(q_try in q_seq_narrow_ricco) {
  for (p_try in p_seq_narrow_ricco) {
    was_error = FALSE
    tryCatch(expr = {
      print(paste("trying p: ", p_try, "and q: ", q_try))
      capture.output(est_p_q_search_ricco <- est.arma.wge(AVG_E_i1_ricco, p=p_try, q=q_try), file='NUL')    
      rolling.res.long_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, phi=est_p_q_search_ricco$phi, theta=est_p_q_search_ricco$theta, d=1, s=0,  horizon = 36, plot=FALSE)
      capture.output(f_ricco <- fore.aruma.wge(df$AVG_EP, phi=est_p_q_search_ricco$phi, d=1, s=0, n.ahead=36, lastn=TRUE, plot=FALSE))
      prediction_ase_ricco = mean((df$AVG_EP[(266-35):266]-f$f)^2) 
      # weight last 36 higher because they are the forecasts we should care about most
      cost_ricco = (rolling.res.long_ricco$rwRMSE + prediction_ase_ricco)/2
      if(cost_ricco < best_cost_narrow_ricco) {
        best_cost_narrow_ricco = cost_ricco
        best_rsme_s0_d1_p_search_narrow = rolling.res.long_ricco$rwRMSE
        best_last36_ase_narrow = prediction_ase_ricco
        best_p_narrow_ricco = p_try
        best_q_narrow_ricco = q_try
        best_arma_estimates = est_p_q_search_ricco
      }
    },
    error = function(e) {
      print(paste("an error occured for p: ", p_try, "and q: ", q_try))
      was_error = TRUE
    })
    if(was_error)
      next
  }
}
print(paste("best long + immediate rolling window p with narrow search was:", best_p_narrow_ricco))
print(paste("best long + immediate rolling window q with narrow search was:", best_q_narrow_ricco))

best_rsme_s0_d1_p_search_narrow^2
best_last36_ase_narrow
length(df$AVG_EP)

# best P was p=30 q=11, WOW! A huge and complex model BUT it was rolling window RSME verified!
#  I wonder how the ARIMA(30,1,11) would do on future, unseen data. it’s probably just REALLY good at fitting the signal we have, even if we rolling window verify. true cross-validation would be better, if we had the data.
print(paste("best long rolling (36) ase was:", best_rsme_s0_d1_p_search^2, "for p:", best_p_ricco, "for q:", best_q_ricco, "and d = 1"))

# last 3 ASE
f_ricco_3 = fore.aruma.wge(df$AVG_EP, phi=best_arma_estimates$phi, theta = best_arma_estimates$theta, d=1, s=0, n.ahead=3, lastn=TRUE)
prediction_ase_ricco_3 = mean((df$AVG_EP[(266-3):266]-f_ricco_3$f)^2) 
print(paste("Last 3 ASE was s:", prediction_ase_ricco_3, "for p:", best_p_narrow_ricco, "q:" , best_q_narrow_ricco, "and d = 1"))

# last 36 ASE
f_ricco_36 = fore.aruma.wge(df$AVG_EP, phi=best_arma_estimates$phi, theta = best_arma_estimates$theta, d=1, s=0, n.ahead=36, lastn=TRUE)
prediction_ase_ricco_36 = mean((df$AVG_EP[(266-35):266]-f_ricco_36$f)^2) 
print(paste("Last 36 ASE was s:", prediction_ase_ricco_36, "for p:", best_p_narrow_ricco, "q:" , best_q_narrow_ricco, "and d = 1"))
rolling.res.short_ricco = roll.win.rmse.wge_ricco(df$AVG_EP, phi=best_arma_estimates$phi, theta=best_arma_estimates$theta, d=1, s=0,  horizon = 3, plot=FALSE)

# Rolling 
rolling.res.short_ricco$rwRMSE^2
best_arma_estimates_SSE = sum(best_arma_estimates$res^2)
aic = 266*log(best_arma_estimates_SSE/266) +2*(30+11+1)
aic

# residuals look VERY white, maybe a slight amount of heteroskeascitity and trend at the very end? 
plotts.sample.wge(f_ricco$resid)

# interestingly, ljung box test cannot reject non-stationarity in the residuals! This seems weird. 
ljung.wge(f_ricco$resid)

best_arma_estimates

Appendix: A Modified version of current roll.win.rmse.wge in github (uses fore.aruma.wge from cran, and doesn’t print), to use the latest tswge, replace fore.aruma.wge with fore.arima.wge)

#Rolling Window RMSE Function
# series is the array of the series
# horizon is how far you want to predict into the future
# d is the order of the differencing: (1-B^)^d
# s is the order of the seasonality: (1-B^s)
# phi = coefficients of the stationary AR term
# theta = coefficients of the invertible MA term

# It simply takes the given horizon and the model in the form of s,d,phis and 
# thetas and figures out how many windows it can create in the data (series) and then calculates the ASE for each window.  
#The output is the average off all the ASEs from each individual window.  

roll.win.rmse.wge_ricco = function(series, horizon = 2, s = 0, d = 0, phi = 0, theta = 0, plot=FALSE)
{

  #DEFINE fore.arma.wge2
  
  fore.arma.wge2=function(x,phi=0,theta=0,n.ahead=5,lastn=FALSE, plot=FALSE,alpha=.05,limits=TRUE, xbar2 = NULL)
  {
    # lastn=TRUE indicates that the last n data values are to be forecast
    # lastn=FALSE (default) indicates we want foreacts for n values beyond the end of the realization
    n=length(x)
    p=length(phi)
    if(sum(phi^2)==0) {p=0}
    q=length(theta)
    if(sum(theta^2)==0) {q=0}
    #resid=rep(0,n)
    npn.ahead=n+n.ahead
    xhat=rep(0,npn.ahead)
    if(is.null(xbar2))
    {
      xbar=mean(x)
    }
    else
    {
      xbar = xbar2
    }
    
    const=1
    if (p > 0) {for(jp in 1:p) {const=const-phi[jp]}}
    #
    #
    # Calculate Box-Jenkins Forecasts
    #
    #
    #Calculating Residuals
    #
    resid=backcast.wge(x,phi,theta,n.back=50)
    #
    #
    #maconst=const*xbar
    #p1=max(p+1,q+1)
    #for (i in p1:n) {resid[i]=x[i]
    #   if ( p > 0) {for (jp in 1:p) {resid[i]=resid[i]-phi[jp]*x[i-jp]}}
    #   if (q > 0) {for (jq in 1:q) {resid[i]=resid[i]+theta[jq]*resid[i-jq]}}
    #                   resid[i]=resid[i]-maconst}
    #
    # Calculating Forecasts
    #
    #
    npn.ahead=n+n.ahead
    xhat=rep(0,npn.ahead)
    mm=n
    #
    #lastn = TRUE
    #
    if(lastn==TRUE) {mm=n-n.ahead}
    #
    #
    for (i in 1:mm) {xhat[i]=x[i]}
    for (h in 1:n.ahead) {
      if (p > 0) {for (jp in 1:p) {xhat[mm+h]=xhat[mm+h]+phi[jp]*xhat[mm+h-jp]}}
      if ((h<=q)&(h>0)) {for(jq in h:q) {xhat[mm+h]=xhat[mm+h]-theta[jq]*resid[mm+h-jq]}}
      xhat[mm+h]=xhat[mm+h]+xbar*const}
    #
    #
    #   Calculate psi weights for forecasts limits
    #
    #
    xi=psi.weights.wge(phi,theta,lag.max=n.ahead)
    #
    #
    #
    #Setting up for plots
    nap1=n.ahead+1
    fplot=rep(0,nap1)
    maxh=mm+n.ahead
    llplot=rep(0,nap1)
    ulplot=rep(0,nap1)
    f=rep(0,nap1)
    ll=rep(0,nap1)
    ul=rep(0,nap1)
    wnv=0
    xisq=rep(0,n.ahead)
    se=rep(0,n.ahead)
    se0=1
    for (i in 1:n) {wnv=wnv+resid[i]**2}
    wnv=wnv/n
    xisq[1]=1
    for (i in 2:n.ahead) {xisq[i]=xisq[i-1]+xi[i-1]^2}
    for (i in 1:n.ahead) {se[i]=sqrt(wnv*xisq[i])}
    fplot[1]=x[mm]
    for (i in 1:n.ahead) {fplot[i+1]=xhat[mm+i]}
    ulplot[1]=x[mm]
    #for (i in 1:n.ahead) { ulplot[i+1]=fplot[i+1]+1.96*se[i]}
    for (i in 1:n.ahead) { ulplot[i+1]=fplot[i+1]-qnorm(alpha/2)*se[i]}
    llplot[1]=x[mm]
    #for (i in 1:n.ahead) { llplot[i+1]=fplot[i+1]-1.96*se[i]}
    for (i in 1:n.ahead) { llplot[i+1]=fplot[i+1]+qnorm(alpha/2)*se[i]}
    #
    if(limits==FALSE) {
      if(lastn==TRUE) {max=max(x,xhat[1:n])
      min=min(x,xhat[1:n])}
      else            {max=max(x,xhat)
      min=min(x,xhat)}}
    if(limits==TRUE) {min=min(x,llplot)
    max=max(x,ulplot)}
    #numrows <- 1
    #numcols <- 1
    timelab <- 'Time'
    valuelab <- ''
    #fig.width <- 5
    #fig.height <- 2.5
    cex.labs <- c(.8,.7,.8)
    #par(mfrow=c(numrows,numcols),mar=c(6,2,3,1))
    t<-1:n;
    np1=n+1
    np.ahead=mm+n.ahead
    tf<-mm:np.ahead
    #if (plot=='TRUE') {
      #fig.width <- 5
      #fig.height <- 2.5
      #cex.labs <- c(1.2,1.2,1.2)
      #par(mfrow=c(numrows,numcols),mar=c(9,4,3,2))
      #plot(t,x,type='o',xaxt='n',yaxt='n',cex=.8,pch=16,cex.lab=1,cex.axis=1,lwd=1,xlab='',ylab='',xlim=c(1,maxh),ylim=c(min,max),col=1)
      #axis(side=1,cex.axis=1.1,mgp=c(3,0.15,0),tcl=-.3);
      #axis(side=2,las=1,cex.axis=1.1,mgp=c(3,.4,0),tcl=-.3)
      #abline=mean(x)
      #mtext(side=c(1,2,1),cex=cex.labs,text=c(timelab,valuelab,""),line=c(1.2,2.1,1.8))
      #points(tf,fplot,type='o',lty=1,cex=.6,lwd=1,pch=1,col=2);
      #if(limits=='TRUE') {points(tf,ulplot,type='l',lty=2,cex=0.6,lwd=.75,pch=1,col=4)
        #points(tf,llplot,type='l',lty=3,cex=0.6,lwd=.75,pch=1,col=4)
     # }
    #}
    np1=n+1
    nap1=n.ahead+1
    f=fplot[2:nap1]
    # Calculate RMSE and MAD
    if(lastn==TRUE){
      t.start=n-n.ahead
      sum.rmse=0
      sum.mad=0
      for(i in 1:n.ahead) {sum.rmse=sum.rmse+(f[i]-x[t.start+i])^2
      sum.mad=sum.mad+abs(f[i]-x[t.start+i])}
      mse=sum.rmse/n.ahead
      rmse=sqrt(mse)
      mad=sum.mad/n.ahead
    }
    ll=llplot[2:nap1]
    ul=ulplot[2:nap1]
    if(lastn==TRUE){out1=list(f=f)}
    if(lastn==FALSE){out1=list(f=f)}
    return(out1)
  }

  
  numwindows = 0
  RMSEHolder = numeric()

  
if(s == 0 & d == 0)
{
  
  trainingSize = max(length(phi),length(theta)) + 1 # The plus 1 is for the backcast residuals which helps with ARMA model with q > 0
  numwindows = length(series)-(trainingSize + horizon) + 1   
  RMSEHolder = numeric(numwindows)

  # print(paste("Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with", numwindows, "windows."))
  
  for( i in 1:numwindows)
  {
    forecasts <- fore.arma.wge2(series[i:(i+(trainingSize-1))], plot = TRUE, phi = phi, theta = theta,n.ahead = horizon, xbar = mean(series))

    RMSE = sqrt(mean((series[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2))
    
    RMSEHolder[i] = RMSE
  }
}
else
  {
    trainingSize = sum(length(phi),length(theta),s, d) + 1 # sum and plus one is to help backcast.wge, lag.max and ylim plotting issue in fore.arima.wge
    numwindows = length(series)-(trainingSize + horizon) + 1
    RMSEHolder = numeric(numwindows)

    # print(paste("Please Hold For a Moment, TSWGE is processing the Rolling Window RMSE with", numwindows, "windows."))
    
    for( i in 1:numwindows)
    {
      #invisible(capture.output(forecasts <- fore.arima.wge(series[i:(i+(trainingSize-1))],phi = phis, theta = thetas, s = s, d = d,n.ahead = horizon)))
      forecasts <- fore.aruma.wge(series[i:(i+(trainingSize-1))],phi = phi, s = s, d = d, theta = theta,n.ahead = horizon, plot=plot)
      
      RMSE = sqrt(mean((series[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - forecasts$f)^2))
      
      RMSEHolder[i] = RMSE
      
    }
  }
  
  
  RMSEHolder
  #hist(RMSEHolder, main = "RMSEs for Individual Windows")
  WindowedRMSE = mean(RMSEHolder)
  
  # print("The Summary Statistics for the Rolling Window RMSE Are:")
  # print(summary(RMSEHolder))
  # print(paste("The Rolling Window RMSE is: ",round(WindowedRMSE,3)))

#output
  invisible(list(rwRMSE = WindowedRMSE, trainingSize = trainingSize, numwindows = numwindows, horizon = horizon, s = s, d = d, phi = phi, theta = theta, RMSEs = RMSEHolder))
  
  }